Introduction

Load Data

data.loaded <- 
  read.delim(
    file = "../data/publications-SDCC-2021.txt",
    encoding = "UTF-8",
    header = FALSE,
    stringsAsFactors = FALSE
  )

Prepare the Data

Create a Data Frame

  • Create a data frame with publications as rows and properties as columns.
data <-
  matrix(
    data = unlist( data.loaded ),
    ncol = 4,
    byrow = TRUE
  )

colnames( data ) <-
  c(
    "Status",
    "Title",
    "Info",
    "Type"
  )

data <- 
  data.frame(
    data,
    stringsAsFactors = FALSE
  )

head( data )
##      Status
## 1 Published
## 2 Published
## 3 Published
## 4 Published
## 5 Published
## 6 Published
##                                                                                                                                                                        Title
## 1    Associations of hypoglycemia, glycemic variability and risk of cardiac arrhythmias in insulin-treated patients with type 2 diabetes: a prospective, observational study
## 2 The effectiveness of e-learning in patient education delivered to patients with rheumatoid arthritis: The WebRA study-protocol for a pragmatic randomised controlled trial
## 3                            The Association Between Cardiovascular Autonomic Function and Changes in Kidney and Myocardial Function in Type 2 Diabetes and Healthy Controls
## 4                                                                                                Cardiovascular Events with Finerenone in Kidney Disease and Type 2 Diabetes
## 5                                                                            Translation and validation of the Canadian assessment of physical literacy-2 in a Danish sample
## 6                                             Medical therapies for prevention of cardiovascular and renal events in patients with atrial fibrillation and diabetes mellitus
##                                                                                                                                                                                                                                                                                                                                                 Info
## 1                                                                                                            Andersen, A., Bagger, J. I., Sørensen, S. K., Baldassarre, M. P. A., Pedersen-Bjergaard, U., Forman, J. L., Gislason, G., Lindhardt, T. B., Knop, F. K. & Vilsbøll, T., 24 Dec 2021, In: Cardiovascular Diabetology. 20, 1, p. 241 241.
## 2                                                                                                                                                                                                                      Raunsbæk Knudsen, L., Lomborg, K., Ndosi, M., Hauge, E-M. & de Thurah, A., 20 Dec 2021, In: BMC Rheumatology. 5, 1, p. 57 57.
## 3                                                                                      Laursen, J. C., Rasmussen, I. K. B., Zobel, E. H., Hasbak, P., von Scholten, B. J., Holmvang, L., Ripa, R. S., Hansen, C. S., Frimodt-Moeller, M., Kjaer, A., Rossing, P. & Hansen, T. W., 13 Dec 2021, In: Frontiers in Endocrinology. 12, p. 780679 780679.
## 4                                                           Pitt, B., Filippatos, G., Agarwal, R., Anker, S. D., Bakris, G. L., Rossing, P., Joseph, A., Kolkhof, P., Nowack, C., Schloemer, P., Ruilope, L. M., FIGARO-DKD Investigators & Pedersen-Bjergaard, U., 9 Dec 2021, In: The New England journal of medicine. 385, 24, p. 2252-2263 12 p.
## 5                                                                                                                                                                                                      Elsborg, P., Melby, P. S., Kurtzhals, M., Tremblay, M. S., Nielsen, G. & Bentsen, P., 9 Dec 2021, In: BMC PUBLIC HEALTH. 21, 1, p. 2236 2236.
## 6 Fauchier, L., Boriani, G., de Groot, J. R., Kreutz, R., Rossing, P. & Camm, A. J., 7 Dec 2021, In: Europace : European pacing, arrhythmias, and cardiac electrophysiology : journal of the working groups on cardiac pacing, arrhythmias, and cardiac cellular electrophysiology of the European Society of Cardiology. 23, 12, p. 1873-1891 19 p.
##                                                                                  Type
## 1 Research output: Contribution to journal › Journal article › Research › peer-review
## 2 Research output: Contribution to journal › Journal article › Research › peer-review
## 3 Research output: Contribution to journal › Journal article › Research › peer-review
## 4 Research output: Contribution to journal › Journal article › Research › peer-review
## 5 Research output: Contribution to journal › Journal article › Research › peer-review
## 6 Research output: Contribution to journal › Journal article › Research › peer-review

Subset by Publication Type

  • Include journal articles and review articles in the analysis.
table( data$"Type" )
## 
##                                Research output: Contribution to journal › Comment/debate › Research › peer-review 
##                                                                                                                13 
##                Research output: Contribution to journal › Conference abstract in journal › Research › peer-review 
##                                                                                                                 6 
##                                     Research output: Contribution to journal › Editorial › Research › peer-review 
##                                                                                                                 3 
##                               Research output: Contribution to journal › Journal article › Research › peer-review 
##                                                                                                               278 
##                                        Research output: Contribution to journal › Letter › Research › peer-review 
##                                                                                                                 7 
##                                        Research output: Contribution to journal › Review › Research › peer-review 
##                                                                                                                32
is.included <- 
  grepl( 
    x = data$"Type",
    pattern = "(Journal article)|(Review)"
  )

data <- data[ is.included, ]

Create Additional Properties

  • Separate authors from the information column, which also contains the publication date and the journal reference.
# Split by the date text.

tmp <-
  stringr::str_split_fixed(
    string = data[ , 3 ],
    pattern = "(\\,\\s([0-9]+\\s)?([A-Z][a-z][a-z]\\s)?2021\\,\\s)",
    n = 2
  )

colnames( tmp ) <- 
  c(
    "Authors",
    "Reference"
  )

tmp <- 
  data.frame(
    tmp,
    stringsAsFactors = FALSE
  )

data <-
  dplyr::bind_cols(
    data,
    tmp
  )

str( data )
## 'data.frame':    310 obs. of  6 variables:
##  $ Status   : chr  "Published" "Published" "Published" "Published" ...
##  $ Title    : chr  "Associations of hypoglycemia, glycemic variability and risk of cardiac arrhythmias in insulin-treated patients "| __truncated__ "The effectiveness of e-learning in patient education delivered to patients with rheumatoid arthritis: The WebRA"| __truncated__ "The Association Between Cardiovascular Autonomic Function and Changes in Kidney and Myocardial Function in Type"| __truncated__ "Cardiovascular Events with Finerenone in Kidney Disease and Type 2 Diabetes" ...
##  $ Info     : chr  "Andersen, A., Bagger, J. I., Sørensen, S. K., Baldassarre, M. P. A., Pedersen-Bjergaard, U., Forman, J. L., Gis"| __truncated__ "Raunsbæk Knudsen, L., Lomborg, K., Ndosi, M., Hauge, E-M. & de Thurah, A., 20 Dec 2021, In: BMC Rheumatology. 5, 1, p. 57 57." "Laursen, J. C., Rasmussen, I. K. B., Zobel, E. H., Hasbak, P., von Scholten, B. J., Holmvang, L., Ripa, R. S., "| __truncated__ "Pitt, B., Filippatos, G., Agarwal, R., Anker, S. D., Bakris, G. L., Rossing, P., Joseph, A., Kolkhof, P., Nowac"| __truncated__ ...
##  $ Type     : chr  "Research output: Contribution to journal › Journal article › Research › peer-review" "Research output: Contribution to journal › Journal article › Research › peer-review" "Research output: Contribution to journal › Journal article › Research › peer-review" "Research output: Contribution to journal › Journal article › Research › peer-review" ...
##  $ Authors  : chr  "Andersen, A., Bagger, J. I., Sørensen, S. K., Baldassarre, M. P. A., Pedersen-Bjergaard, U., Forman, J. L., Gis"| __truncated__ "Raunsbæk Knudsen, L., Lomborg, K., Ndosi, M., Hauge, E-M. & de Thurah, A." "Laursen, J. C., Rasmussen, I. K. B., Zobel, E. H., Hasbak, P., von Scholten, B. J., Holmvang, L., Ripa, R. S., "| __truncated__ "Pitt, B., Filippatos, G., Agarwal, R., Anker, S. D., Bakris, G. L., Rossing, P., Joseph, A., Kolkhof, P., Nowac"| __truncated__ ...
##  $ Reference: chr  "In: Cardiovascular Diabetology. 20, 1, p. 241 241." "In: BMC Rheumatology. 5, 1, p. 57 57." "In: Frontiers in Endocrinology. 12, p. 780679 780679." "In: The New England journal of medicine. 385, 24, p. 2252-2263 12 p." ...

Split Individual Authors

  • Separate individual authors, who are joined by comma or &-sign.
  • This creates a list, where each item corresponds to a publication, and the contents of the item is a character vector containing the names of the authors in the respective publication.
authors <-
  stringr::str_split(
    string = data$"Authors",
    pattern = "(\\.\\, )|(\\. \\& )"
  )

head( authors )
## [[1]]
##  [1] "Andersen, A"           "Bagger, J. I"          "Sørensen, S. K"       
##  [4] "Baldassarre, M. P. A"  "Pedersen-Bjergaard, U" "Forman, J. L"         
##  [7] "Gislason, G"           "Lindhardt, T. B"       "Knop, F. K"           
## [10] "Vilsbøll, T."         
## 
## [[2]]
## [1] "Raunsbæk Knudsen, L" "Lomborg, K"          "Ndosi, M"           
## [4] "Hauge, E-M"          "de Thurah, A."      
## 
## [[3]]
##  [1] "Laursen, J. C"      "Rasmussen, I. K. B" "Zobel, E. H"       
##  [4] "Hasbak, P"          "von Scholten, B. J" "Holmvang, L"       
##  [7] "Ripa, R. S"         "Hansen, C. S"       "Frimodt-Moeller, M"
## [10] "Kjaer, A"           "Rossing, P"         "Hansen, T. W."     
## 
## [[4]]
##  [1] "Pitt, B"                                          
##  [2] "Filippatos, G"                                    
##  [3] "Agarwal, R"                                       
##  [4] "Anker, S. D"                                      
##  [5] "Bakris, G. L"                                     
##  [6] "Rossing, P"                                       
##  [7] "Joseph, A"                                        
##  [8] "Kolkhof, P"                                       
##  [9] "Nowack, C"                                        
## [10] "Schloemer, P"                                     
## [11] "Ruilope, L. M"                                    
## [12] "FIGARO-DKD Investigators & Pedersen-Bjergaard, U."
## 
## [[5]]
## [1] "Elsborg, P"     "Melby, P. S"    "Kurtzhals, M"   "Tremblay, M. S"
## [5] "Nielsen, G"     "Bentsen, P."   
## 
## [[6]]
## [1] "Fauchier, L"    "Boriani, G"     "de Groot, J. R" "Kreutz, R"     
## [5] "Rossing, P"     "Camm, A. J."

Format Author Names

  • Remove additional punctuations and whitespaces.
authors <-
  lapply(
    X = authors,
    FUN = stringr::str_remove_all,
    pattern = "\\,"
  )

authors <-
  lapply(
    X = authors,
    FUN = stringr::str_remove_all,
    pattern = "\\. "
  )

authors <-
  lapply(
    X = authors,
    FUN = stringr::str_remove_all,
    pattern = "\\."
  )

authors <-
  lapply(
    X = authors,
    FUN = stringr::str_trim,
    side = "both"
  )

authors <-
  lapply(
    X = authors,
    FUN = stringr::str_squish
  )

head( authors )
## [[1]]
##  [1] "Andersen A"           "Bagger JI"            "Sørensen SK"         
##  [4] "Baldassarre MPA"      "Pedersen-Bjergaard U" "Forman JL"           
##  [7] "Gislason G"           "Lindhardt TB"         "Knop FK"             
## [10] "Vilsbøll T"          
## 
## [[2]]
## [1] "Raunsbæk Knudsen L" "Lomborg K"          "Ndosi M"           
## [4] "Hauge E-M"          "de Thurah A"       
## 
## [[3]]
##  [1] "Laursen JC"        "Rasmussen IKB"     "Zobel EH"         
##  [4] "Hasbak P"          "von Scholten BJ"   "Holmvang L"       
##  [7] "Ripa RS"           "Hansen CS"         "Frimodt-Moeller M"
## [10] "Kjaer A"           "Rossing P"         "Hansen TW"        
## 
## [[4]]
##  [1] "Pitt B"                                         
##  [2] "Filippatos G"                                   
##  [3] "Agarwal R"                                      
##  [4] "Anker SD"                                       
##  [5] "Bakris GL"                                      
##  [6] "Rossing P"                                      
##  [7] "Joseph A"                                       
##  [8] "Kolkhof P"                                      
##  [9] "Nowack C"                                       
## [10] "Schloemer P"                                    
## [11] "Ruilope LM"                                     
## [12] "FIGARO-DKD Investigators & Pedersen-Bjergaard U"
## 
## [[5]]
## [1] "Elsborg P"   "Melby PS"    "Kurtzhals M" "Tremblay MS" "Nielsen G"  
## [6] "Bentsen P"  
## 
## [[6]]
## [1] "Fauchier L"  "Boriani G"   "de Groot JR" "Kreutz R"    "Rossing P"  
## [6] "Camm AJ"

Extract unique authors

authors.unique <- sort( x = unique( unlist( authors ) ) )

head( authors.unique )
## [1] "'t Hart LM"       "Aaby P"           "Aadahl M"         "Aagaard-Hansen J"
## [5] "Aalborg GL"       "Aasbrenn M"

Network of Authors

Initialize the Adjacency Matrix

  • Authors-by-authors Adjacency matrix
adjacency.mat <-
  array( 
    data = 0,
    dim = c( 1, 1 ) * length( authors.unique )
  )

rownames( adjacency.mat ) <-
  colnames( adjacency.mat ) <-
  authors.unique

str( adjacency.mat )
##  num [1:2038, 1:2038] 0 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:2038] "'t Hart LM" "Aaby P" "Aadahl M" "Aagaard-Hansen J" ...
##   ..$ : chr [1:2038] "'t Hart LM" "Aaby P" "Aadahl M" "Aagaard-Hansen J" ...
str( 
  object = adjacency.mat, 
  vec.len = 5
)
##  num [1:2038, 1:2038] 0 0 0 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:2038] "'t Hart LM" "Aaby P" "Aadahl M" "Aagaard-Hansen J" "Aalborg GL" ...
##   ..$ : chr [1:2038] "'t Hart LM" "Aaby P" "Aadahl M" "Aagaard-Hansen J" "Aalborg GL" ...

Compute the Co-Occurrence of Author Pairs

  • Go through all publications ‘i’.
  • Add up the co-occurrence count for the authors in the publication ‘i’.
for ( i in 1:length( authors ) ) {
  
  tmp <- rownames( adjacency.mat ) %in% authors[[ i ]]
  
  adjacency.mat[ tmp, tmp ] <- adjacency.mat[ tmp, tmp ] + 1
  
}

str( adjacency.mat )
##  num [1:2038, 1:2038] 2 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:2038] "'t Hart LM" "Aaby P" "Aadahl M" "Aagaard-Hansen J" ...
##   ..$ : chr [1:2038] "'t Hart LM" "Aaby P" "Aadahl M" "Aagaard-Hansen J" ...

Subset Authors

  • Include authors, who have more than one publication.
is.included <- diag( adjacency.mat ) > 1

data.plot <- adjacency.mat[ is.included, is.included ]

str( data.plot )
##  num [1:468, 1:468] 2 0 0 0 0 0 2 0 2 0 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:468] "'t Hart LM" "Aagaard-Hansen J" "Adam T" "Adam TC" ...
##   ..$ : chr [1:468] "'t Hart LM" "Aagaard-Hansen J" "Adam T" "Adam TC" ...

Wrap Author Names

  • Add line break upon whitespace.
  • Add line break upon dash.
names <- rownames( data.plot )

names <-
  stringr::str_replace_all(
    string = names,
    pattern = "\\s",
    replacement = "\n"
  )

names <-
  stringr::str_replace_all(
    string = names,
    pattern = "\\-",
    replacement = "-\n"
  )

names <-
  stringr::str_replace_all(
    string = names,
    pattern = "\\n\\-",
    replacement = ""
  )

names( names ) <- rownames( data.plot )

head( names )
##            't Hart LM      Aagaard-Hansen J                Adam T 
##        "'t\nHart\nLM" "Aagaard-\nHansen\nJ"             "Adam\nT" 
##               Adam TC               Afsar B             Agarwal R 
##            "Adam\nTC"            "Afsar\nB"          "Agarwal\nR"

Create a Network Representation of the Adjacency Matrix

  • Create a network representation of the adjacency matrix with the network package.
  • The network should be non-directional.
net.network <- 
  network::network(
    x = data.plot,
    directed = FALSE
  )

Lay Out the Network

  • Lay out the network on a two-dimensional plane (i.e., page) using the ggnetwork package.
ggnetwork <- ggnetwork::ggnetwork( x = net.network )

head( ggnetwork )
##             x         y vertex.names        xend      yend
## 1 0.000000000 0.3325604     Mygind E 0.059466626 0.3103337
## 2 0.000000000 0.3325604     Mygind E 0.103467979 0.3401734
## 3 0.000000000 0.3325604     Mygind E 0.003035168 0.3527912
## 4 0.002999419 0.3112239     Mygind L 0.059466626 0.3103337
## 5 0.002999419 0.3112239     Mygind L 0.000000000 0.3325604
## 6 0.002999419 0.3112239     Mygind L 0.103467979 0.3401734

Define Additional Properties for the Visualization

  • Define properties of the network:
    • Degree: number of vertices (i.e., connections) from each node (here: number of publications for each author)
    • Size: size of each node in the visualization (here: based on degree)
    • Label: Name of each node (here: author) to show in the visualization
    • Text: Additional information to show for each node on hover
      • Name of the node (here: author)
      • Degree of the node (here: number of publications)
      • Number of neighbors to the node (here: number of co-authors)
ggnetwork$"degree" <- diag( data.plot )[ ggnetwork$"vertex.names" ]

ggnetwork$"size" <- ggnetwork$"degree" * 10

ggnetwork$"name.formatted" <- names[ ggnetwork$"vertex.names" ]

tmp <- rowSums( data.plot > 0 ) - 1

ggnetwork$"N.neighbors" <- tmp[ ggnetwork$"vertex.names" ]

ggnetwork$"text" <-
  paste0(
    ggnetwork$"vertex.names",
    ":\n\t",
    ggnetwork$"degree",
    " publications",
    "\n\t",
    ggnetwork$"N.neighbors",
    " co-authors"
  )

head( ggnetwork )
##             x         y vertex.names        xend      yend degree size
## 1 0.000000000 0.3325604     Mygind E 0.059466626 0.3103337      2   20
## 2 0.000000000 0.3325604     Mygind E 0.103467979 0.3401734      2   20
## 3 0.000000000 0.3325604     Mygind E 0.003035168 0.3527912      2   20
## 4 0.002999419 0.3112239     Mygind L 0.059466626 0.3103337      4   40
## 5 0.002999419 0.3112239     Mygind L 0.000000000 0.3325604      4   40
## 6 0.002999419 0.3112239     Mygind L 0.103467979 0.3401734      4   40
##   name.formatted N.neighbors                                      text
## 1      Mygind\nE           5 Mygind E:\n\t2 publications\n\t5 co-authors
## 2      Mygind\nE           5 Mygind E:\n\t2 publications\n\t5 co-authors
## 3      Mygind\nE           5 Mygind E:\n\t2 publications\n\t5 co-authors
## 4      Mygind\nL           7 Mygind L:\n\t4 publications\n\t7 co-authors
## 5      Mygind\nL           7 Mygind L:\n\t4 publications\n\t7 co-authors
## 6      Mygind\nL           7 Mygind L:\n\t4 publications\n\t7 co-authors

Create the Basic Visualization

  • Using the ggplot2 package with rendering of the nodes and edges from the ggnetwork package.
plot <-
  ggplot2::ggplot(
    data = ggnetwork,
    mapping =
      ggplot2::aes(
        x = x,
        y = y,
        xend = xend,
        yend = yend
      )
  ) +
  ggnetwork::geom_edges() +
  ggnetwork::geom_nodes()

plot

Create Visualization with Additional Properties

  • Add additional properties:
    • Text for tooltip (used later in the interactive version)
    • Formatted names of the nodes (here: authors)
    • Transparent edges
    • Node size according to the degree of the node (here: number of publications)
    • Simple white colour theme
plot <-
  ggplot2::ggplot(
    data = ggnetwork,
    mapping =
      ggplot2::aes(
        x = x,
        y = y,
        xend = xend,
        yend = yend,
        label = name.formatted,
        text = text
      )
  ) +
  ggnetwork::geom_edges( alpha = 0.125 ) +
  ggnetwork::geom_nodes(
    mapping =
      ggplot2::aes(
        size = size
      )
  ) +
  ggnetwork::geom_nodetext( 
    alpha = 0.25,
    size = 3
  ) +
  ggthemes::theme_solid()

plot

Create the Interactive Visualization

  • Create an interactive version of the visualization with the ggplotly function from the plotly package.
  • Show info text about an author on hover-over.
plotly::ggplotly(
  p = plot,
  tooltip = c( "text" )
)

Appendix

SessionInfo

utils::sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] sna_2.6              highr_0.9            pillar_1.6.2        
##  [4] compiler_4.0.4       jquerylib_0.1.4      tools_4.0.4         
##  [7] digest_0.6.27        viridisLite_0.4.0    jsonlite_1.7.2      
## [10] evaluate_0.14        lifecycle_1.0.0      tibble_3.0.6        
## [13] gtable_0.3.0         lattice_0.20-41      pkgconfig_2.0.3     
## [16] rlang_0.4.11         crosstalk_1.1.1      yaml_2.2.1          
## [19] xfun_0.27            coda_0.19-4          httr_1.4.2          
## [22] dplyr_1.0.4          stringr_1.4.0        knitr_1.34          
## [25] htmlwidgets_1.5.4    generics_0.1.0       vctrs_0.3.8         
## [28] grid_4.0.4           tidyselect_1.1.1     data.table_1.13.6   
## [31] glue_1.4.2           R6_2.5.1             plotly_4.9.4.1      
## [34] fansi_0.4.2          rmarkdown_2.11       tidyr_1.1.2         
## [37] farver_2.1.0         purrr_0.3.4          ggplot2_3.3.5       
## [40] magrittr_2.0.1       ggthemes_4.2.4       scales_1.1.1        
## [43] ellipsis_0.3.2       htmltools_0.5.1.1    colorspace_2.0-0    
## [46] labeling_0.4.2       utf8_1.1.4           stringi_1.5.3       
## [49] ggnetwork_0.5.10     network_1.17.1       lazyeval_0.2.2      
## [52] munsell_0.5.0        statnet.common_4.5.0 crayon_1.4.1